set more off
set type double
clear all

*remove small msas from analysis
use maindataset, clear

ge observation=_n
sort observation 

reshape long opcarrier segment, i(observation) j(seqnum)
drop if segment==""

sort market
merge market using rankmsamarket
keep if _merge==3
drop _merge
save postdemoselection, replace

local yearloop "1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016"
foreach y of local yearloop {

use postdemoselection, clear

keep if year==`y'

drop year quarter

keep market segment tkcarrier opcarrier mktpass_tkopcarrier 

********************************************************************************
* generate a dataset at the market level that identifies the competitors
********************************************************************************
*use temporarydata
preserve

keep market tkcarrier
duplicates drop

ge competitor=1

*loop through the list of majors, generate dummy variables for each major
*each line in dataset is a market, dummy variable for major =1 if they are competitor in that market 
levelsof tkcarrier, local(majors)
reshape wide competitor, i(market) j(tkcarrier) string
foreach x of local majors {
		replace competitor`x' = 0 if competitor`x' == .
}

sort market

save competitors, replace

restore

********************************************************************************
*Generate a dataset at the regional level that identifies which majors have information on that regional
********************************************************************************
* if there are 1s, it means that the two firms must use the regional in at least one market

*generate list of owned regionals (owned regionals cannot be/are not used by competing majors)

preserve

*bring in ownership data
use affiliations, clear

keep if year==`y'
keep if subsidiary_ofmajor=="Yes"
keep opcarrier code_ofmajor 
duplicates drop

save ownedregionals`y', replace

restore

*now, generate info matrix 
preserve
keep opcarrier tkcarrier
duplicates drop
ge info=1

*fix issue of majors being operating carriers for other majors from codesharing
levelsof tkcarrier, local(majors)
foreach x of local majors {
	replace info = 0 if opcarrier=="`x'" & tkcarrier != opcarrier
}

*fix issue of majors using other major's owned regionals from codesharing
merge m:1 opcarrier using ownedregionals`y'
drop if _merge == 2
erase ownedregionals`y'.dta
replace info = 0 if _merge == 3 & tkcarrier != code_ofmajor
drop _merge code_ofmajor

levelsof tkcarrier, local(majors)
reshape wide info, i(opcarrier) j(tkcarrier) string
foreach x of local majors {
		replace info`x' = 0 if info`x' == .
}

sort opcarrier
save opcarrierinfo, replace

restore

********************************************************************************
*generate passenger shares for op carriers 
********************************************************************************
use postdemoselection, clear

keep if year==`y'
drop year quarter

*this sums the total number of passenger trips in a market through all of the various routes
keep observation market tkcarrier mktpass_tkopcarrier
duplicates drop
bysort market tkcarrier: egen totalpassengers=sum(mktpass_tkopcarrier)
keep tkcarrier market totalpassengers
duplicates drop
save totaltkpassengers.dta, replace

*now determine the share of passengers carried by each operating carrier for a major in a market
use postdemoselection, clear

keep if year==`y'
drop year quarter

keep observation market opcarrier tkcarrier mktpass_tkopcarrier
duplicates drop
bysort market tkcarrier opcarrier: egen op_passengers=sum(mktpass_tkopcarrier)
keep tkcarrier market opcarrier op_passengers
duplicates drop
save regionalpassengers.dta, replace

********************************************************************************
*merge datasets on competitors and info
********************************************************************************

use postdemoselection, clear

keep if year==`y' 
drop year quarter

*merge in information about who are the competitors in each market
sort market
merge m:1 market using competitors 
drop _merge
erase competitors.dta

*merge in information about which majors have info on which regionals
merge m:1 opcarrier using opcarrierinfo
drop _merge
erase opcarrierinfo.dta
sort market observation

*at the regional, market level, determine whether a major is a competitor and whether it knows information about the given regional
levelsof tkcarrier, local(majors)
foreach x of local majors {
		ge competitorinfo`x'=competitor`x'*info`x'
		drop info`x'
}
foreach x of local majors {
		replace competitorinfo`x'=0 if tkcarrier=="`x'"
}

*now, need to calculate info shares at the observation (route-market-tkcarrier) level

*first, bring in total market-tkcarrier level passenger totals
merge m:1 tkcarrier market using totaltkpassengers
drop _merge
sort observation tkcarrier opcarrier

*need competitor*info at the observation (route-market-tkcarrier) level
levelsof tkcarrier, local(majors)
foreach x of local majors {
	bysort observation: egen comp_info_route`x' = sum(competitorinfo`x')
	replace comp_info_route`x' = 1 if comp_info_route`x'>1 
	drop competitorinfo`x'
}

*now, bring to route-market-tkcarrier level (remove segment level data)
drop seqnum opcarrier segment 
duplicates drop

*now, measure the share that each route is of the total market for that tkcarrier
gen route_share = mktpass_tkopcarrier / totalpassengers

*reshape so that we have for each market, at the major, regional, competitor major level whether that competitor knows the regional. So these lines are pairwise (major-AA, competitor - DL for example)
reshape long competitor comp_info_route, i(tkcarrier observation market) j(competitorid) string
sort market tkcarrier observation competitorid

*drop the entries of competitors not in that market
drop if competitor==0
*only keep lines for competitors, not the own major
drop if tkcarrier==competitorid

*generate variable that captures how much total info a competitor has about a major across all the regionals used in the market
ge info_share_step = route_share * comp_info_route
bysort market tkcarrier competitorid: egen infoshares = sum(info_share_step)

*bring it to just major-competitor level. So for instance in a market with just AA and DL we will have an AA-DL entry and a DL-AA entry
keep tkcarrier market totalpassengers competitorid competitor comp_info_route infoshares 
duplicates drop

*create variable of major pairs to link with original EK work
egen standardcombo=concat(tkcarrier competitorid)

*want this to be competitor-pair, market level
*replace the competitorinfo variable 
drop comp_info_route 
gen competitorinfo = 1 if infoshares>0
replace competitorinfo = 0 if missing(competitorinfo)
*drop duplicates
duplicates drop
*now data is unique at the major-competitor, market level

*generate market shares for weighted average of csc
preserve
keep tkcarrier market totalpassengers
duplicates drop
bysort market: egen total_mkt_pass = sum(totalpassengers)
gen mkt_share = totalpassengers / total_mkt_pass
drop totalpassengers total_mkt_pass
save mkt_share_temp, replace
restore

merge m:1 tkcarrier market using mkt_share_temp
sort market tkcarrier competitorid
drop _merge
erase mkt_share_temp.dta

save datawithcompetitorinfo, replace

********************************************************************************
*gen variables at the tkcarrier/market/time level that record what is the average information in a market
********************************************************************************

use datawithcompetitorinfo, clear
drop competitorinfo competitor

*create common sub-contracting variable as interaction between all of the pairwise info measures
bysort market: egen csc_avg = mean(infoshares)

*generate a measure of csc by just the counts of overlap in each market
gen count = 1
bysort market: egen total_contacts = sum(count)
gen regional_contact_point = 0 
replace regional_contact_point = 1 if infoshares>0
bysort market: egen total_regional_contacts = sum(regional_contact_point)
gen csc_count_avg = total_regional_contacts / total_contacts

*generate a measure of csc weighted by the mkt share of each ticketing carrier
*first, generate a competitive mkt share variable
preserve
keep tkcarrier market mkt_share
duplicates drop
rename tkcarrier competitorid
rename mkt_share comp_mkt_share
save comp_mkt_share_temp, replace
restore
merge m:1 competitorid market using comp_mkt_share_temp
drop _merge
erase comp_mkt_share_temp.dta
sort market tkcarrier competitorid
*now generate the weighted average 
bysort market: egen sum_weights = sum(comp_mkt_share)
gen weighted_info_share = infoshares * comp_mkt_share
bysort market: egen csc_weighted_avg_step = sum(weighted_info_share)
gen csc_weighted_avg = csc_weighted_avg_step / sum_weights

drop tkcarrier 
rename competitorid tkcarrier
keep tkcarrier market csc_avg csc_weighted_avg csc_count_avg
duplicates drop

gen quarter = 2
gen year = `y'
save info_`y', replace

********************************************************************************
*Generate ek_mmc
********************************************************************************

use datawithcompetitorinfo, clear
erase datawithcompetitorinfo.dta

*This computes standard EK measure
bysort standardcombo: egen ek_sums = sum(competitor)
bysort market: egen EK_denom = sum(competitor)
bysort market: egen ek_num = sum(ek_sums)
gen mmc_ek = ek_num/EK_denom/1000

*bring it to market level and save 
keep market mmc_ek
duplicates drop

*save this data with a time index, and add year and quarter back
gen quarter=2
gen year= `y'
save mmc_allmeasures`y', replace

*erase used temp data
erase regionalpassengers.dta
erase totaltkpassengers.dta

}

********************************************************************************
*Merge all years results for Master Data Set
********************************************************************************

*First, append all mmc measures by year and quarter together
use mmc_allmeasures1998.dta, clear
local yearloop "1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016"
foreach y of local yearloop {
	append using mmc_allmeasures`y'
}
local yearloop "1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016"
foreach y of local yearloop {
	erase mmc_allmeasures`y'.dta
}

save mmc_allmeasures, replace

*merge mmc measures
use postdemoselection, clear
contract year quarter market
drop _freq

merge 1:1 year quarter market using mmc_allmeasures.dta
*no values constructed for monopoly markets so they will all be missing
erase mmc_allmeasures.dta
drop _merge

*Merge these measures back to main dataset
merge 1:m year quarter market using postdemoselection

drop _merge
sort year quarter market tkcarrier observation seqnum

preserve
*merge in the information on shared regionals
use info_1998.dta, clear
local yearloop "1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016"
foreach y of local yearloop {
	append using info_`y'
}
local yearloop "1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016"
foreach y of local yearloop {
	erase info_`y'.dta
}
save info_formerge, replace

restore

merge m:1 year quarter market tkcarrier using info_formerge
erase info_formerge.dta
*note monopoly markets don't merge as expected
drop _merge

*have all of the market-tkcarrier-time level data needed
keep year quarter market tkcarrier mmc_ek csc_avg csc_weighted_avg csc_count_avg
duplicates drop

save temp, replace

*merge this data back in with the route-level data

use maindataset, clear
ge observation=_n
merge m:1 year quarter tkcarrier market using temp
drop _merge
sort year quarter market tkcarrier observation 
erase temp.dta

*need to generate owned and unowned regional dummies 
save beforeloop, replace
use beforeloop, clear

local regionalloop "1 2 3"
foreach x of local regionalloop {
    
	preserve
	use affiliations.dta, clear
	rename opcarrier opcarrier`x'
	save temp`x', replace
	restore


*Merge in affiliations data (regional/major ownership)
merge m:1 year quarter opcarrier`x' using temp`x'
sleep 10
erase temp`x'.dta

*replace code_ofmajor with code if it is already a majors
replace code_ofmajor = tkcarrier if tkcarrier == opcarrier`x'

*rename the owned regional information variables
rename subsidiary_ofmajor subsidiary_ofmajor`x'
rename code_ofmajor code_ofmajor`x'

*drop observations from the affiliations data only that don't appear in the regression data
drop if _merge==2
drop _merge


*************************
*Generate regional dummy variables
gen regional`x' = opcarrier`x'
egen yr_tkcarrier`x' = concat(tkcarrier year)
egen yr_regcarrier`x' = concat(regional`x' year)
*remove from regional variables majors that show up as opcarriers because of codeshares
levelsof yr_tkcarrier`x', local(tkcarriers`x')
foreach t of local tkcarriers`x' {
	replace regional`x' = "" if yr_regcarrier`x' == "`t'"
	}

drop yr_tkcarrier`x'
drop yr_regcarrier`x'	
	
*generate dummy to capture whether the regional is owned or unowned
gen owned_regional`x'=0
replace owned_regional`x' = 1 if subsidiary_ofmajor`x'=="Yes"
drop subsidiary_ofmajor`x' code_ofmajor`x'
*generate unowned_regional only if a regional is being used
gen unowned_regional`x'=0
replace unowned_regional`x'=1 if owned_regional`x'==0 & regional`x' != ""	

*remove observations for "regionals" HA and CS. HA only has one observation and CS has two. Neither appear in the data as tkcarriers
drop if regional`x' == "HA" | regional`x' == "CS"	
	
}	

erase beforeloop.dta

egen oneowned = rmax(owned_regional1 owned_regional2 owned_regional3)
egen onenotowned = rmax (unowned_regional1 unowned_regional2 unowned_regional3)

*drop the airlines that rarely appear <2000 obs of 3.5 million obs
gen step = 1
bysort tkcarrier: egen total_obs = sum(step)
drop if total_obs < 2000
drop step total_obs

sort year quarter market tkcarrier
save regressionsdataset.dta, replace

********************************************************************************
*Create variables the measure the intensity of regional usage
*these will take the form of passengers * miles
********************************************************************************
local loop "1 2 3"
foreach x of local loop {
use distancecoupons.dta, clear

	*generate segment codes to merge into main dataset
	gen segment`x' = origincoup + "" + destcoup
	rename marketdistance sgmdistance`x'
	keep segment`x' sgmdistance`x'
	save distances_temp`x', replace
}

*merge with main data
use regressionsdataset.dta, clear

merge m:1 segment1 using distances_temp1
drop if _merge==2
drop _merge
erase distances_temp1.dta
merge m:1 segment2 using distances_temp2
drop if _merge==2
drop _merge
erase distances_temp2.dta
merge m:1 segment3 using distances_temp3
drop if _merge==2
drop _merge
erase distances_temp3.dta

*what is total distance that airline uses to serve market? (not the nonstop mileage)
replace sgmdistance2 = 0 if missing(sgmdistance2)
replace sgmdistance3 = 0 if missing(sgmdistance3)
gen total_distance = sgmdistance1 + sgmdistance2 + sgmdistance3

*generate total distance * passengers for each route
gen tot_tk_mktpassmiles = mktpass_tkopcarrier * total_distance

*generate variables for both unowned and owned regionals which calculate passenger*miles along the route
gen owned_regional_usage_step = owned_regional1*sgmdistance1*mktpass_tkopcarrier + owned_regional2*sgmdistance2*mktpass_tkopcarrier + owned_regional3*sgmdistance3*mktpass_tkopcarrier

gen unowned_regional_usage_step = unowned_regional1*sgmdistance1*mktpass_tkopcarrier + unowned_regional2*sgmdistance2*mktpass_tkopcarrier + unowned_regional3*sgmdistance3*mktpass_tkopcarrier

*generate usage which is fraction of passenger-miles transported by owned and unowned regionals
*this gives intensity of regional usage in each route
gen owned_regional_usage = owned_regional_usage_step / tot_tk_mktpassmiles
gen unowned_regional_usage = unowned_regional_usage_step / tot_tk_mktpassmiles

*drop temp variables
drop regional1 regional2 regional3 tot_tk_mktpassmiles owned_regional_usage_step unowned_regional_usage_step 

sort year quarter market tkcarrier

save regressionsdataset.dta, replace











